# Balance table comparing other homes to homes in main sample
pacman::p_load(tidyverse, qs, sf, modelsummary, data.table, fst, kableExtra, tinytable)

source("code/globals.R")

# Load main dataset
data <- qread(file.path(WORKING, "analysisdataset.qs"))

# Apply sampling restrictions
regs <- data %>% filter(sample_main)
  
regs <- regs %>% 
  mutate(sample = "Main", FIPS = sprintf("%05d", FIPS))

# Load CA at-risk 
allCA <- read_fst(file.path(WORKING, "statewide-costeff-sample.fst"), as.data.table = T)


allCA <- allCA %>% 
  mutate(sample = "All", 
         PropertyZip = sprintf("%05d", PropertyZip))

# CA at-risk in Tocode areas
allCA_codeareas <- allCA %>% 
  filter(regime %in% c("sra", "lraYY", "lraYN", "lraNY")) %>% 
  mutate(sample = "Code areas")

# CA at-risk in SRA areas
allCA_sra <- allCA %>% 
  filter(regime == "sra") %>% 
  mutate(sample = "SRA")

# Stack the datasets and select variables
all_samples <- bind_rows(regs, allCA, allCA_codeareas, allCA_sra)
                                   
# Select variables for comparison
bal_df <- all_samples %>% 
  transmute(combinedyear, slope, LotSizeAcres, sqfeet1000, TotalBedrooms, elev, 
            wildfire_hazard_pct = wildfire_hazard * 100, 
            Sample = factor(sample, levels = c("Main", "All", "Code areas", "SRA")))

# Balance table
out_df <- datasummary_balance(~ Sample, bal_df, dinm = F, fmt = fmt_significant(2),
                    output = "data.frame")

out_df

# Rename variables using data dictionary
out_df <- out_df %>% 
  mutate(` ` = dict_names[as.character(` `)])

# Rename columns
names(out_df) <- c(" ", "main_mean", "main_sd", "at_risk_mean", "at_risk_sd", "code_areas_mean", "code_areas_sd", "sra_mean", "sra_sd")

# Combine mean and SD into single columns
out_df <- out_df %>%
  transmute(
    ` `,
    `  ` = sprintf("%s (%s)", main_mean, main_sd),
    `All` = sprintf("%s (%s)", at_risk_mean, at_risk_sd),
    `Code areas` = sprintf("%s (%s)", code_areas_mean, code_areas_sd),
    SRA = sprintf("%s (%s)", sra_mean, sra_sd)
  )

# Report number of observations for text
all_samples %>% count(sample)

kbl(out_df, 
    format = "latex",
    escape = F, 
    booktabs = T,
    align = "lcccc",
    linesep = "") %>%
  add_header_above(c(" " = 1, "Main" = 1, "At-Risk California Homes" = 3)) %>%
  write(file.path(RES, "compare-to-other-homes.tex"))
